library(Seurat)
library(monocle3)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:Seurat':
##
## Assays
##
## Attaching package: 'monocle3'
## The following objects are masked from 'package:Biobase':
##
## exprs, fData, fData<-, pData, pData<-
library(SingleCellExperiment)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::collapse() masks IRanges::collapse()
## x dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## x dplyr::count() masks matrixStats::count()
## x dplyr::desc() masks IRanges::desc()
## x tidyr::expand() masks S4Vectors::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks S4Vectors::first()
## x dplyr::lag() masks stats::lag()
## x ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## x purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## x dplyr::rename() masks S4Vectors::rename()
## x purrr::simplify() masks DelayedArray::simplify()
## x dplyr::slice() masks IRanges::slice()
library(ggpubr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
# Load integrated object
seurat_obj <- readRDS("/Volumes/LACIE/RPE_scRNA/PUBLISHED/H9_RPE_IntegratedObject.rds")
# Load colours for clusters
colour_df <- read_csv("~/Documents/Experiments/RPE_scRNA/Analysis/RevisedAnalysis/cluster_colours.csv")
## Parsed with column specification:
## cols(
## Cluster = col_character(),
## Colour = col_character()
## )
colour_vec <- as.character(colour_df$Colour)
names(colour_vec) <- as.character(colour_df$Cluster)
# Retrieve residuals from integrated slot
counts <- seurat_obj[["integrated"]]@scale.data
# Retrieve metadata
metadata <- FetchData(seurat_obj, c("condition", "Months", "cluster",
"seurat_clusters",
"nCount_RNA",
"nFeature_RNA",
"percent.mt",
"percent.rb"))
colnames(metadata)[3] <- "cluster_identity"
# Create gene dataframe
gene_annotation <- data.frame(gene_short_name = rownames(counts),
num_cells_expressed =
Matrix::rowSums(counts > 0),
row.names = rownames(counts))
# Create CDS monocle object
cds <- new_cell_data_set(counts,
cell_metadata = metadata,
gene_metadata = gene_annotation)
## Warning in log(cell_total): NaNs produced
As this is scRNA-seq, we will use PCA to reduce the dimensionality of the data. We will use 30 dimensions, as used in previous analyses. Note that as we are using residuals, normalization and scaling is not necessary.
cds <- preprocess_cds(cds, num_dim = 30, norm_method = "none", scaling = FALSE)
plot_pc_variance_explained(cds)
We are bypassing the align_cds step as we are using harmonized Pearson residuals from the Seurat workflow. The PCA-reduced data is reduced further for plotting, using UMAP.
cds <- reduce_dimension(cds, max_components = 2, reduction_method = "UMAP",
cores = 1, preprocess_method = "PCA", verbose = FALSE)
plot_cells(cds, reduction_method = "UMAP",
color_cells_by = "Months",
show_trajectory_graph = FALSE) +
scale_color_manual(name = "Months", values = c("1" = "#e71837",
"12" = "#00674f")) +
ggtitle("Timepoint", subtitle = "Monocle 3 UMAP")
Perform clustering and learn the trajectory on the resulting UMAP projection.
cds <- cluster_cells(cds)
cds <- learn_graph(cds)
##
|
| | 0%
|
|======================================================================| 100%
cluster_monocle <- plot_cells(cds, reduction_method = "UMAP",
color_cells_by = "cluster",
show_trajectory_graph = TRUE) +
ggtitle("Monocle 3 Clusters", subtitle = "Monocle 3 Trajectory Analysis")
partition_monocle <- plot_cells(cds, reduction = "UMAP",
color_cells_by = "partition",
show_trajectory_graph = TRUE) +
ggtitle("Partitions", subtitle = "Monocle 3 Trajectory Analysis")
cluster_seurat <- plot_cells(cds, reduction = "UMAP",
color_cells_by = "cluster_identity",
show_trajectory_graph = TRUE) +
scale_colour_manual(name = "Cluster", values = colour_vec) +
ggtitle("Clusters", subtitle = "Monocle 3 Trajectory Analysis")
ggarrange(cluster_monocle, partition_monocle, cluster_seurat, ncol = 3)
The analysis shows a branching trajectory.
From previous analyses, we know proliferative cells are still present in the population. We can use these as a starting point for the trajectory.
proliferative_genes <- c("MKI67", "TOP2A", "TPX2", "PTTG1", "PCLAF", "RRM2")
proliferative_monocle_umap <- plot_cells(cds, genes = proliferative_genes,
show_trajectory_graph= TRUE,
label_branch_points = FALSE,
label_leaves = FALSE,
label_roots = FALSE,
label_cell_groups=FALSE,
label_groups_by_cluster = FALSE)
proliferative_monocle_umap
To calculate pseudotime values, we will set the root of the tree at the point closest to the proliferative cells. THis is done via a shiny app, so we will not run this chunk.
# Use shiny app to select node closest to proliferative cells
cds <- order_cells(cds)
We can then review the pseudotime values.
cds <- readRDS("/Volumes/LACIE/RPE_scRNA/PUBLISHED/H9_RPE_Residual_Monocle3_Object.rds")
# Plot pseudotime values over UMAP
pseudotime_plot <- plot_cells(cds, color_cells_by = "pseudotime",
show_trajectory_graph = TRUE,
label_cell_groups = FALSE,
label_leaves = FALSE,
label_branch_points = FALSE,
label_roots = FALSE) +
ggtitle("Clusters", subtitle = "Monocle 3 Trajectory Analysis")
pseudotime_plot
To identify genes that change as a function of pseudotime, we will perform the Moran I’s test via the graph_test function.
trajectory_pr_test_df <- graph_test(cds,
neighbor_graph="principal_graph",
cores=4)
trjaectory_pr_test_df <- trajectory_pr_test_df %>%
rownames_to_column(var = "gene_id")
trajectory_pr_de_genes_df <- trjaectory_pr_test_df %>% filter(q_value < 0.05)
trajectory_pr_de_genes_df <- trajectory_pr_de_genes_df %>%
arrange(desc(abs(morans_I))) %>%
dplyr::select(gene_id, gene_short_name, status, everything())
trajectory_pr_de_genes_df
Using significant genes (FDR < 0.05), we identified co-expression modules at multiple resolutions. The function will select the most stable result.
gene_module_df <- find_gene_modules(cds[trajectory_pr_de_genes_df$gene_id,],
reduction_method = "UMAP",
random_seed = 1, cores = 3,
resolution = 10^seq(-6, -1))
gene_module_df <- gene_module_df %>% arrange(supermodule, module)
gene_module_df
We will review how these modules relate to each cluster by generating a heatmap.
cell_group_df <- tibble::tibble(cell = rownames(colData(cds)),
cell_group = colData(cds)$cluster_identity)
agg_mat <- aggregate_gene_expression(cds, gene_module_df, cell_group_df)
## Warning in normalized_counts(cds, norm_method = norm_method, pseudocount =
## pseudocount): NaNs produced
row.names(agg_mat) <- stringr::str_c("Module ", row.names(agg_mat))
agg_mat <- as.matrix(agg_mat)
agg_mat <- scale(agg_mat)
cols = rev(colorRampPalette(RColorBrewer::brewer.pal(11,"RdYlBu"))(100))
breaks = seq(0, 1, length=101)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.2.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
## ========================================
heatmap <- Heatmap(agg_mat,
cluster_rows = function(m) hclust(dist(agg_mat), method = "ward.D2"),
row_names_gp = gpar(fontsize = 8),
col = cols)
heatmap